// cd /projects/hsieh_project/proj_201809/code_2_201910/
// qstata out_ind_deco.do &

set linesize 255
capture log close
log using /projects/hsieh_project/proj_201809/code_2_201910/out_ind_deco_log, replace text

clear all
cd /projects/

di "$S_TIME $S_DATE"

global rev_date: display %tdYYNNDD date("$S_DATE", "DMY")
di "${rev_date}"

global dir_proj "/projects/hsieh_project/proj_201809/"

global dir_do "${dir_proj}/code_2_201910/"
global dir_data "${dir_proj}/data/"
global dir_out "${dir_proj}/output/201910_main/ind_deco/"
noi capture mkdir ${dir_out}
noi capture mkdir "${dir_out}/data/"

global dir_fig "${dir_out}/figure"
capture mkdir ${dir_fig}

global ds_ind "${dir_data}/ind_sum_all_add"

global year1 = 1977
global year2 = 2013

global gl_perc = "10"
local l_perc = "10"

do "/projects/hsieh_project/code_0_general/f_rounding.do"
do "/projects/hsieh_project/code_0_general/f_reg.do"
do "${dir_proj}/code_0_general/p_weight.do"

global ds_out "${dir_out}/ind_deco"
global xlsx_out "${dir_out}/ind_deco.xlsx"
global xlsx_lpoly "${dir_out}/lpoly_ind_reg"

global glmkt = "fips msa83 zipcode"
*global vmsacz = "msacz"

//==============================================================================
// Define the program that runs the OLS regression

/*
--------------------------------------------------------------------------------
Decomposition of change in ln(emp share of top firms)
Extensive margin: change in ln(# of markets of top / all firms)
Intensive margin: change in ln(emp per market of top / all firms)

i_sect = subset of sectors to use
i_perc = top i_perc% ind firms
i_dv = dependent variable
i_iv = independent variable
*/

/*
Note: 03.16.2021: Might need to address weighting for within-sector regressions/graphs*/

capture program drop lbd_ols
program lbd_ols
	args i_sect i_perc i_dv i_iv
	
	preserve
	
	// Potential subsetting
	if "`i_sect'" == "" local i_sect = "0"
	if inlist("`i_sect'", "0") {
		// Use all sectors
		replace sector = 0
	}
	if inlist("`i_sect'", "31") {
		// Use services, retail, wholesale
		replace sector = 31 if inlist(sector, 3, 5, 6)
	}
	if inlist("`i_sect'", "32") {
		// Use sectors except services, retail, wholesale
		replace sector = 32 if ! inlist(sector, 3, 5, 6)
	}
	di "`i_dv' `i_sect'"
	
	keep if inlist(sector, `i_sect')
	
	if ! inlist(`i_sect', 0) {
		// Recalculate weight with selected industries
		keep year ch_ind miss_`i_perc' emp_ind `i_dv'_`i_perc' ln_emps_ind_d_`i_perc' w_emps_`i_perc' 
		rename w_emps_`i_perc' ow_emps_`i_perc'
		p_weight ${year1} ${year2} `i_perc' 0
	}
	
	p_y1y2_d "`i_dv'" `i_perc' ${year1} ${year2}
	//local i_iv = "ln_emps_ind_d_`i_perc'"
	reg `i_dv'_d_`i_perc' `i_iv'_`i_perc' [aw=w_sv_`i_perc'] if year == ${year1}
	f_reg_append "fr_reg" "`i_iv'_`i_perc'"
    frame fr_reg {
        replace sector = `i_sect' if _n == _N
        replace perc = `i_perc' if _n == _N
    }
    
end

//------------------------------------------------------------------------------
// Program that renames msacz (var name too long)

capture program drop f_rename
program f_rename
	foreach ivar of var *msa1983cz* {
		local ivar_new = regexr("`ivar'", "msa1983cz", "${vmsacz}")
		rename `ivar' `ivar_new'
	}
	
	foreach ivar of var *msa1983* {
		local ivar_new = regexr("`ivar'", "msa1983", "msa83")
		rename `ivar' `ivar_new'
	}
end

//==============================================================================
// Decomposition (employment)

f_reg_create "fr_reg" "1"
frame fr_reg {
    gen sector = .
    gen perc = .
    order sector perc
}


foreach i_perc in 10 1 {
di "Percentile `i_perc'"
use $ds_ind, clear
f_rename
p_weight 1977 2013 "`i_perc'"
*Merge SV weights for 2013-1977: 
keep if inlist(year, ${year1}, ${year2})
merge m:1 ch_ind using ${dir_proj}/output/202011_main/sv_weights/sv_weights_1977_2013, assert(match) nogen keepusing(w_num_`i_perc' w_sv_`i_perc')

//------------------------------------------------------------------------------
// Run ols regressions

lbd_ols 0 `i_perc' "ln_est_ind_r" "ln_emps_ind_d"
lbd_ols 0 `i_perc' "ln_est_ind_rc" "ln_emps_ind_d"

foreach vmkt in $glmkt {
	global vmkt = "`vmkt'"
	lbd_ols 0 `i_perc' "ln_${vmkt}_ind_r2" "ln_emps_ind_d"
	lbd_ols 0 `i_perc' "ln_${vmkt}_ind_r2c" "ln_emps_ind_d"
	lbd_ols 0 `i_perc' "ln_mkt_${vmkt}_r" "ln_emps_ind_d"
}

}

f_reg_save "fr_reg" "${dir_out}/reg_ind_deco"

//==============================================================================
// Decomposition (sales)

f_reg_create "fr_reg" "1"
frame fr_reg {
    gen sector = .
    gen perc = .
    order sector perc
}

foreach i_perc in 10 1 {
di "Percentile `i_perc'"
use ${dir_data}/sales_ind_sum_add, clear
f_rename
keep if sales_miss_`i_perc' != 1
merge m:1 ch_ind using ${dir_proj}/output/202011_main/sv_weights/sv_weights_1977_2013, keepusing(w_num_`i_perc' w_sv_`i_perc')
keep if _merge == 3
drop _merge
replace w_sv_`i_perc' = . if n_ind < (100 / `i_perc')
p_weight 1977 2013 "`i_perc'"
keep if inlist(year, ${year1}, ${year2})
p_y1y2_d "ln_saless_ind" `i_perc' ${year1} ${year2}

//------------------------------------------------------------------------------
// Run ols regressions

lbd_ols 0 `i_perc' "ln_est_ind_r" "ln_saless_ind_d"
lbd_ols 0 `i_perc' "ln_est_ind_rc" "ln_saless_ind_d"

foreach vmkt in $glmkt {
	global vmkt = "`vmkt'"
	lbd_ols 0 `i_perc' "ln_${vmkt}_ind_r2" "ln_saless_ind_d"
	lbd_ols 0 `i_perc' "ln_${vmkt}_ind_r2c" "ln_saless_ind_d"
}

}

f_reg_save "fr_reg" "${dir_out}/reg_ind_deco_sales"

//==============================================================================
// Define the program that runs the lpoly regression
//==============================================================================
*/
capture program drop lbd_lpoly
program lbd_lpoly
	args i_sect i_perc y y_lab x x_lab t_samp
	
	if "`i_sect'" == "" local i_sect = "0"
	
	if regexm("`x'", "emps") {
		global xlsx_lpoly_sel "${xlsx_lpoly}_emps_`i_perc'.xlsx"
	}
	
	if regexm("`x'", "saless") {
		global xlsx_lpoly_sel "${xlsx_lpoly}_saless_`i_perc'.xlsx"
	}
	
	//local i_bwidth = 0.06
	
	
	//--------------------------------------------------------------------------
	// Figures specs
	/*Code assigning x/y plotting ranges and steps redacted */
	local x_min = "REDACTED"
	local x_max = "REDACTED"
	local y_min = "REDACTED"
	local y_max = "REDACTED"
	local y_step = "REDACTED"
    /*
	if inlist("`y'", "ln_emps_aggin") {
		local x_min = "REDACTED"
		local x_max = "REDACTED"
		local y_min = "REDACTED"
		local y_max = "REDACTED"
		if `i_sect' == "REDACTED" local y_max = "REDACTED"
		local y_step = "REDACTED"
	}
	if inlist("`y'", "ln_estn_inda") {
		local x_min = "REDACTED"
		local x_max = "REDACTED"
		local y_min = "REDACTED"
		local y_max = "REDACTED"
		local y_step = "REDACTED"
	}
	if regexm("`y'", "ln_(est|fips|msa)_ind_r2?$") {
		local x_min = "REDACTED"
		local x_max = "REDACTED"
		local y_min = "REDACTED"
		local y_max = "REDACTED"
		if regexm("`y'", "ln_(est)_ind_r2?$") local y_max = "REDACTED"
	}
	if regexm("`y'", "ln_(est|fips|msa)_ind_r2?c$") {
		local x_min = "REDACTED"
		local x_max = "REDACTED"
		local y_min = "REDACTED"
		local y_max = "REDACTED"
		local y_step = "REDACTED"
	}
    */
	
	preserve
	
	//--------------------------------------------------------------------------
	// Potential subsetting (same as above)
	if inlist("`i_sect'", "0") {
		replace sector = 0
	}
	if inlist("`i_sect'", "12") {
		replace sector = 12 if ! inlist(sector, 1)
	}
	if inlist("`i_sect'", "31") {
		replace sector = 31 if inlist(sector, 3, 5, 6)
	}
	if inlist("`i_sect'", "32") {
		replace sector = 32 if ! inlist(sector, 3, 5, 6)
	}
	di "`y' `i_sect'"
	
	keep if inlist(sector, `i_sect')
	
	if ! inlist(`i_sect', 0) {
		// Recalculate weight with selected industries
		if "`y'" == "ln_emps_aggin" {
			rename ln_emps_aggin_`i_perc' o_ln_emps_aggin_`i_perc'
			keep year ch_ind miss_`i_perc' emp_ind o_`y'_`i_perc' ln_emps_ind_d_`i_perc' w_emps_`i_perc'
		} 
		else {
			keep year ch_ind miss_`i_perc' emp_ind `y'_`i_perc' ln_emps_ind_d_`i_perc' w_emps_`i_perc'
		}
		rename w_emps_`i_perc' ow_emps_`i_perc'
		p_weight ${year1} ${year2} `i_perc' 0
		if "`y'" == "ln_emps_aggin" {
			rename ln_emps_aggin_`i_perc' ln_emps_aggin_`i_perc'_s
			rename o_ln_emps_aggin_`i_perc' ln_emps_aggin_`i_perc'
		}
	}
	
	//--------------------------------------------------------------------------
	// Calculate difference
	p_y1y2_d `y' `i_perc' ${year1} ${year2}
	
	keep `y'_d_`i_perc' `x' w_emps_`i_perc' w_sv*
	duplicates drop
	
	* ROUNDING TO 4SF TO BE DISCLOSURE-COMPLIANT *
	sum `y'_d_`i_perc' `x'
	rounding_4dig "`y'_d_`i_perc' `x'"
	sum `y'_d_`i_perc' `x'
	
	save "${dir_out}/data/`y'_`i_perc'_`x'", replace
    
	// Run lpoly
	lpoly `y'_d_`i_perc' `x' [aweight=w_sv_`i_perc'], nograph ///
		bwidth(`i_bwidth') n(100) ///
		gen(`y'_d_`i_perc'_x `y'_d_`i_perc'_s) se(`y'_d_`i_perc'_se) ci level(99)
		
	keep `y'_d_`i_perc'_x `y'_d_`i_perc'_s `y'_d_`i_perc'_se
	rounding_4dig `y'_d_`i_perc'_x
	rounding_4dig `y'_d_`i_perc'_s
	rounding_4dig `y'_d_`i_perc'_se
	export excel "${xlsx_lpoly_sel}", sheet(`y'_s`i_sect') sheetreplace firstrow(var) keepcellfmt 
	
	//--------------------------------------------------------------------------
	// Create figure
	gen `y'_d_`i_perc'_ciu = `y'_d_`i_perc'_s + 2.58 * `y'_d_`i_perc'_se
	gen `y'_d_`i_perc'_cil = `y'_d_`i_perc'_s - 2.58 * `y'_d_`i_perc'_se
	
	keep if inrange(`y'_d_`i_perc'_x, `x_min', `x_max') & inrange(`y'_d_`i_perc'_s, `y_min', `y_max')
	replace `y'_d_`i_perc'_ciu = `y_max' if `y'_d_`i_perc'_ciu > `y_max' & `y'_d_`i_perc'_ciu != .
	replace `y'_d_`i_perc'_cil = `y_min' if `y'_d_`i_perc'_cil < `y_min'
	
	/*
	twoway (rarea `y'_d_`i_perc'_cil `y'_d_`i_perc'_ciu `y'_d_`i_perc'_x, fc(none) lwidth(thin) lcolor(gs5)) ///
		(connected `y'_d_`i_perc'_s `y'_d_`i_perc'_x, m(i) lcolor(navy)), ///
		legend(off label(1 "99% CI") label(2 "lpoly smooth")) ///
		ylabel(`y_min'(`y_step')`y_max') xlabel(`x_min'(0.25)`x_max') aspect(.) ///
		yscale(noextend) xscale(noextend) ///
		title("`y_lab'") subtitle("99% CI") ytitle("`y_lab'") xtitle("`x_lab'") note("Sample: `t_samp'")
    */
    twoway (rarea `y'_d_`i_perc'_cil `y'_d_`i_perc'_ciu `y'_d_`i_perc'_x, fc(none) lwidth(thin) lcolor(gs5)) ///
		(connected `y'_d_`i_perc'_s `y'_d_`i_perc'_x, m(i) lcolor(navy)), ///
		legend(off label(1 "99% CI") label(2 "lpoly smooth")) ///
		yscale(noextend) xscale(noextend) ///
		title("`y_lab'") subtitle("Top `i_perc'% Firms, 99% CI") ytitle("`y_lab'") xtitle("`x_lab'") note("Sample: `t_samp'")
	if c(mode) != "batch" graph export "${dir_fig}/lpoly_Y_`y'_X_`x'_s`i_sect'_p`i_perc'.png", replace height(2500) width(3000)
    
	restore
end

//==============================================================================
// Decomposition (employment)

foreach i_perc in 10 1 {
use $ds_ind, clear
f_rename
p_weight 1977 2013 "`i_perc'"
keep if inlist(year, ${year1}, ${year2})
*Merge SV weights for 2013-1977: 
keep if inlist(year, ${year1}, ${year2})
merge m:1 ch_ind using ${dir_proj}/output/202011_main/sv_weights/sv_weights_1977_2013, assert(match) nogen keepusing(w_num_`i_perc' w_sv_`i_perc')

//------------------------------------------------------------------------------
// Run lpoly regressions

local x_lab = "d log(Emp Top/Emp Ind)"
lbd_lpoly 0 `i_perc' "ln_est_ind_r" "d ln(Est Top/Est Ind)" "ln_emps_ind_d" "`x_lab'"
lbd_lpoly 0 `i_perc' "ln_est_ind_rc" "d ln(Est Top/Est Ind)" "ln_emps_ind_d" "`x_lab'"

foreach vmkt in $glmkt {
	global vmkt = "`vmkt'"
	lbd_lpoly 0 `i_perc' "ln_${vmkt}_ind_r2" "d ln(${vmkt} Top/${vmkt} Ind)" "ln_emps_ind_d" "`x_lab'"
	lbd_lpoly 0 `i_perc' "ln_${vmkt}_ind_r2c" "d ln(${vmkt} Top/${vmkt} Ind)" "ln_emps_ind_d" "`x_lab'"
}

}


//==============================================================================
// Decomposition (sales)
*/
foreach i_perc in 10 1 {
use ${dir_data}/sales_ind_sum_add, clear
f_rename
keep if sales_miss_`i_perc' != 1
p_weight 1977 2013 "`i_perc'"
keep if inlist(year, ${year1}, ${year2})
p_y1y2_d "ln_saless_ind" `i_perc' ${year1} ${year2}
*Merge SV weights
merge m:1 ch_ind using ${dir_proj}/output/202011_main/sv_weights/sv_weights_1977_2013, keepusing(w_num_`i_perc' w_sv_`i_perc')
keep if _merge == 3
drop _merge
replace w_sv_`i_perc' = . if n_ind < (100 / `i_perc')

//------------------------------------------------------------------------------
// Run lpoly regressions

local x_lab = "d log(Sales Top/Sales Ind)"
lbd_lpoly 0 `i_perc' "ln_est_ind_r" "d ln(Est Top/Est Ind)" "ln_saless_ind_d" "`x_lab'"
lbd_lpoly 0 `i_perc' "ln_est_ind_rc" "d ln(Est Top/Est Ind)" "ln_saless_ind_d" "`x_lab'"

foreach vmkt in $glmkt {
	global vmkt = "`vmkt'"
	lbd_lpoly 0 `i_perc' "ln_${vmkt}_ind_r2" "d ln(${vmkt} Top/${vmkt} Ind)" "ln_saless_ind_d" "`x_lab'"
	lbd_lpoly 0 `i_perc' "ln_${vmkt}_ind_r2c" "d ln(${vmkt} Top/${vmkt} Ind)" "ln_saless_ind_d" "`x_lab'"
}

}

capture noi log close
// End of do file
